
/// Weighting the Evidence: A Rank-Dependent Model of Outdoor Recreation, June 2024
/// This code cleans and formats the 2022 summer flounder raw survey data

set type double, permanently 
use rawdata.dta, clear
drop if a1_none==1  
gen s1=c_fluke_+c_trip_c+c_scup_b+c_scup_1+ c_fluke2 
gen s2=c_fluke1+c_trip_c+c_scup_b+c_scup_2+ c_fluke2 
drop if (s1==5 & type=="WEB") | (s2==5 & type=="MAIL") 
expand 6
sort qtid
by qtid: gen question=_n
gen identifier=_n
expand 3
sort qtid question
by qtid question: gen idx=_n
gen trip="A" 
replace trip="B" if idx==2
replace trip="C" if idx==3
drop idx
gen chosen=0
order qtid version question trip chosen
sort qtid version question trip

local i=1 
while `i' <=6 {
replace chosen=1 if question==`i' & trip=="A" & b`i'=="A"
replace chosen=1 if question==`i' & trip=="B" & b`i'=="B"
replace chosen=1 if question==`i' & trip=="C" & b`i'=="C"
local i= `i' + 1
}
 
replace chosen=missing(chosen) if (b1=="" & b2=="" & b3=="" & b4=="" & b5=="" & b6=="") 
replace chosen=missing(chosen) if (b1=="" & b2=="" & missing (b3) & missing(b4) & missing(b5) & missing(b6)) 

local i=1 
while `i' <=6 {
drop if (question==`i' & b`i'=="9") 
drop if (question==`i' & missing(b`i')) 
local i= `i' + 1
}

keep qtid version question chosen b1 b2 b3 b4 b5 b6 trip a1_scup a1_summe a1_black a1_none a2_summe a2_black a2_scup a3_party a3_chart a3_shore a3_priva a5 d1 d2 d3 d4 comments complete surveyda intervie type registra lic_st1 lic_st2 lic_st3 lic_st4 licname1 licname2 licname3 licname4 lic_st city state zip selectio sampling identifier s1 s2 type 

merge m:1 version question trip using "MasterKey.dta"
sort qtid version question trip
drop _merge
order qtid identifier version question trip
save cleandata.dta, replace

gen opt_out=cond(trip=="C",1,0)
egen avidity=rowtotal(a3_party a3_chart a3_shore a3_priva), missing 
egen max_avidity=max(avidity)
replace avidity=avidity/max_avidity 
gen male=d1
replace male=0 if d1==2
gen birthday=date(registra,"DMY")
gen age=age(birthday, td(30jun2022))/100  

gen income_medium=cond(d4>3 & d4<7,1,0)
replace income_medium=. if mi(d4)
gen income_high=cond(d4>6,1,0)
replace income_high=. if mi(d4)
gen education_college=cond(d3>2 & d3<6,1,0)
replace education_college=. if mi(d3)
gen education_graduate=cond(d3>5,1,0)
replace education_graduate=. if mi(d3)

gen keepOther=(keep_bsb+keep_scup)/100
gen releaseOther=(catch_bsb+catch_scup)/100-keepOther

  local i=0
while `i' <=8 {
	
   replace keep_fluke_`i'=keep_fluke_max if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'==0)
   replace keep_fluke_`i'=0 if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'!=0)
   replace p_keep_fluke_`i'=1 if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'==0)
   replace p_keep_fluke_`i'=0 if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'!=0)
   gen release_fluke_`i'=(catch_fluke_max-keep_fluke_`i') if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'==0)
   replace release_fluke_`i'=0 if (keep_fluke_max==keep_fluke_min & trip!="C" & `i'!=0)
   
  /*For risky trips with given probability distribution*/
  replace release_fluke_`i'=catch_fluke_max-keep_fluke_`i' if (keep_fluke_distribution=="Yes" & trip!="C")
  
 /*For ambiguous trips (i.e., keep specified as a range but without explicit distribution) we will assume here a discrete uniform distribution. When necessary, this issue will be dealt with in the Matlab code for the different models*/
  replace keep_fluke_`i'=`i' if (trip!="C" & keep_fluke_range=="Yes" & keep_fluke_distribution=="No")
  replace p_keep_fluke_`i'=0 if (trip!="C" & keep_fluke_range=="Yes" & keep_fluke_distribution=="No")
  replace p_keep_fluke_`i'=1/(keep_fluke_max-keep_fluke_min+1) if ((`i'>=keep_fluke_min & `i'<=keep_fluke_max) & (trip!="C" & keep_fluke_range=="Yes" & keep_fluke_distribution=="No"))
  replace release_fluke_`i'=catch_fluke_max-keep_fluke_`i' if (trip!="C" & keep_fluke_range=="Yes" & keep_fluke_distribution=="No") 

  
  replace keep_fluke_`i'=0 if trip=="C"
  replace p_keep_fluke_`i'=0 if trip=="C"
  replace release_fluke_`i'=0 if trip=="C"

  replace release_fluke_`i'=0 if release_fluke_`i'<0
  local i= `i' + 1
 } 
 

  local i=0
while `i' <=8 {

replace keep_fluke_`i'=keep_fluke_`i'/100
replace release_fluke_`i'=release_fluke_`i'/100
 
 local i= `i' + 1
 } 
 
replace keepOther=cond(trip=="C",0,keepOther)
replace releaseOther=cond(trip=="C",0,releaseOther)
replace cost=cond(trip=="C",0,cost/1000) 
replace catch_fluke_min=cond(trip=="C",0,catch_fluke_min/100)
replace catch_fluke_max=cond(trip=="C",0,catch_fluke_max/100)
replace keep_fluke_min=cond(trip=="C",0,keep_fluke_min/100)
replace keep_fluke_max=cond(trip=="C",0,keep_fluke_max/100)
replace keep_bsb=cond(trip=="C",0,keep_bsb/100)
replace keep_scup=cond(trip=="C",0,keep_scup/100)
replace catch_bsb=cond(trip=="C",0,catch_bsb/100)
replace catch_scup=cond(trip=="C",0,catch_scup/100)

gen release_bsb=catch_bsb-keep_bsb
gen release_scup=catch_scup-keep_scup
order releaseOther keepOther

order age income_medium income_high education_college education_graduate avidity male cost identifier chosen opt_out release_bsb keep_bsb catch_scup 

egen missdemographics=rowmiss(age income_medium income_high education_college education_graduate avidity male) 
drop if missdemographics>0

gen rng=cond(keep_fluke_range=="Yes" & keep_fluke_distribution=="No",1,0)
bysort identifier: egen yes_rng=max(rng)
drop if yes_rng==1

egen ID=group(identifier) 

drop trip
gen v=1
bysort ID: gen trip=sum(v)

drop version-slimit_scup max_avidity-birthday missdemographics catch_fluke_range keep_fluke_range keep_fluke_distribution identifier missdemographics rng yes_rng v s1 s2 type
export excel using "ReadyDataFluke2022", firstrow(variables) replace
save "ReadyDataFluke2022.dta", replace


